Prepare spatially-resolved transcriptomic data

mkdir -p temp
if ! [ -d temp/data.zarr ] ; then
    [ -f Xenium_Prime_Ovarian_Cancer_FFPE_XRrun_xe_outs.zip ] || \
        curl -O https://cf.10xgenomics.com/samples/xenium/3.0.0/Xenium_Prime_Ovarian_Cancer_FFPE_XRrun/Xenium_Prime_Ovarian_Cancer_FFPE_XRrun_xe_outs.zip
    unzip Xenium_Prime_Ovarian_Cancer_FFPE_XRrun_xe_outs.zip -d temp
    data-beans from-zarr temp/cell_feature_matrix.zarr.zip \
        --backend zarr \
        --do-squeeze \
        --row-nnz-cutoff 10 \
        --column-nnz-cutoff 10 \
        --output temp/data
fi

Run delta-SVD

[ -f temp/dsvd.latent.parquet ] || \
    pinto dsvd temp/data.zarr \
    --coord temp/cells.zarr.zip \
    --coord-column-names cell_centroid_x,cell_centroid_y \
    -d 1024 \
    -t 7 --preload-data \
    -o temp/dsvd

This produces:

Output file Description
dsvd.coord_pairs.parquet Spatial coordinates for each cell pair (edge)
dsvd.basis.parquet SVD basis vectors (gene loadings, @shared / @diff suffixes)
dsvd.latent.parquet Latent edge representations (L2-normalized)
dsvd.propensity.parquet Soft cell-to-topic membership
dsvd.gene_topic.parquet Per-topic gene expression rates
dsvd.edge_cluster.parquet Edge-to-cluster assignments

Visualize results in R

library(arrow)
library(data.table)
library(ggplot2)
library(patchwork)

theme_set(theme_minimal(base_size = 12))

Load delta-SVD results

dsvd_prop <- as.data.table(read_parquet("temp/dsvd.propensity.parquet"))
dsvd_gene <- as.data.table(read_parquet("temp/dsvd.gene_topic.parquet"))
dsvd_basis <- as.data.table(read_parquet("temp/dsvd.basis.parquet"))

Load coordinates

coords <- as.data.table(read_parquet("temp/cells.parquet"))

Delta-SVD: cell topic map

  • Each cell has a soft loading vector across latent topics. We assign each cell to its dominant topic and label topics by their top marker genes.
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
.dsvd <- dsvd_prop %>%
    as.data.table() %>%
    melt(id.vars = c("cell", "cluster"), variable.name = "topic_label") %>%
    filter(value > .1) %>%
    rename(cell_id = cell) %>%
    left_join(coords) %>%
    na.omit() %>%
    as.data.table()
## Joining with `by = join_by(cell_id)`
.aes <- aes(x = cell_centroid_x,
            y = cell_centroid_y,
            colour = topic_label,
            size = value)

ggplot(.dsvd, .aes) +
    theme_void() +
    scale_size_continuous(range=c(0, 0.5), guide="none") +
    ggrastr::rasterize(geom_point(stroke = 0.0),dpi=300) +
    scale_colour_brewer(palette="Dark2") +
    scale_y_reverse()

Delta-SVD: top genes per topic

# gene_topic is long format: row (gene), column (topic), mean, sd, log_mean
dsvd_gene[, gene := sub("^[^_]+_", "", row)]

top_genes <- dsvd_gene[, .SD[order(-mean)][1:10], by = column]

ggplot(top_genes, aes(x = reorder(gene, mean), y = mean)) +
    geom_col() +
    coord_flip() +
    facet_wrap(~ column, scales = "free_y") +
    labs(title = "Delta-SVD: top 10 genes per topic",
         x = NULL, y = "Expression rate")

Based on the top marker genes, the delta-SVD topics correspond to major cell populations in the ovarian cancer tumour microenvironment:

Topic Top markers Interpretation
0 MYH11, MYL9, CNN1 Smooth muscle cells
1 H19, PLXNB1, CD47, NORAD Tumour epithelial cells
2 C7, DCN, CCN1 Stromal / immune interface
3 BGN, LUM, COL5A1, DCN, POSTN Desmoplastic stroma / cancer-associated fibroblasts (CAFs)
4 H19, CD47, LAPTM4B, YWHAZ Tumour cells (overlaps topic 1)
5 C1QC, C7, MS4A6A Macrophages / myeloid cells
6 AQP1, COL4A1, ADAMTS1 Endothelium / vasculature